## ----------------------------------------------------------
## Validated Participation Functions
## ----------------------------------------------------------

### load packages
require(CBPS, quietly = TRUE)
require(scales, quietly = TRUE)
require(estimatr, quietly = TRUE)
require(tidyverse, quietly = TRUE)
require(gridExtra, quietly = TRUE)
require(stargazer, quietly = TRUE)
require(texreg, quietly = TRUE)
require(xtable, quietly = TRUE)
require(lattice, quietly = TRUE)
require(corrplot, quietly = TRUE)
require(equivalence, quietly = TRUE)
require(sp, quietly = TRUE) #GIS packages
require(spdep, quietly = TRUE)
require(rgdal, quietly = TRUE)
require(maps, quietly = TRUE)
require(mapdata, quietly = TRUE)
require(maptools, quietly = TRUE)
require(ggmap, quietly = TRUE)
require(patchwork, quietly = TRUE)
require(psych, quietly = TRUE)

#### Functions ####

# Correlation test
col <- colorRampPalette(c("#BB4444", "#EE9988", "#FFFFFF", "#77AADD", "#4477AA"))

# Cluster robust SEs for regression
clm <- function(dat, fm, cluster){
  require(sandwich)
  require(lmtest)
  not <- attr(fm$model,"na.action")
  if( ! is.null(not)){
    cluster <- cluster[-not]
    dat <- dat[-not,]
  }
  with(dat,{
    M <- length(unique(cluster))
    N <- length(cluster)
    K <- fm$rank
    dfc <- (M/(M-1))*((N-1)/(N-K))
    uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
    vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
    return(list(regtable = coeftest(fm, vcovCL), 
            vcov = vcovCL))
  }
  )
}

# Predicted Probability function for treatment groups non-mixed effects models
fitpred <- function(fit, fit2, modeltype){ #fit2 is cluster SE adjusted fit

  beta.sim <- mvrnorm(1000, mu = coef(fit), Sigma = fit2$vcov) # MC simulation
  
  matSO <- matIW <- matVP <- as.data.frame(model.matrix(fit)) # model matrix of fit

  matSO$IW <- 0 # new datasets depending on treatment status
  matSO$VP <- 0

  matIW$IW <- 1
  matIW$VP <- 0
    
  matVP$IW <- 0
  matVP$VP <- 1

  if(modeltype == "lm"){ #generate predicted probs depending on modeltype
    
    predSO <- as.matrix(matSO) %*% t(beta.sim)
    predIW <- as.matrix(matIW) %*% t(beta.sim)
    predVP <- as.matrix(matVP) %*% t(beta.sim)
    
    
  } else if(modeltype == "glm"){
    
    logistic <- function(x) exp(x)/(1+exp(x))
    
    predSO <- logistic(as.matrix(matSO) %*% t(beta.sim))
    predIW <- logistic(as.matrix(matIW) %*% t(beta.sim))
    predVP <- logistic(as.matrix(matVP) %*% t(beta.sim))
  
  } else if(modeltype == "polr"){
  
    ordlogistic <- function(x, beta.sim){
    ce <- beta.sim[, c(1:(ncol(beta.sim)-4))] #coefs
    ic <- beta.sim[, c((ncol(beta.sim)-3):ncol(beta.sim))] #intercepts  

    logit1 <- ic[,1] - (as.matrix(x)[,-1] %*% t(ce))
    logit2 <- ic[,2] - (as.matrix(x)[,-1] %*% t(ce))
    logit3 <- ic[,3] - (as.matrix(x)[,-1] %*% t(ce))
    logit4 <- ic[,4] - (as.matrix(x)[,-1] %*% t(ce))
  
    pLeq1  <- 1 / (1 + exp(-logit1))   # p(Y <= 1)
    pLeq2  <- 1 / (1 + exp(-logit2))   # p(Y <= 2)
    pLeq3  <- 1 / (1 + exp(-logit3))   # p(Y <= 3)
    pLeq4  <- 1 / (1 + exp(-logit4))   # p(Y <= 3)
  
    1 * pLeq1 + 2 * (pLeq2 - pLeq1) + 3 * (pLeq3 - pLeq2) + 
    4 * (pLeq4 - pLeq3) + 5 * (1 - pLeq4)}
    
    predSO <- ordlogistic(as.matrix(matSO), beta.sim)
    predIW <- ordlogistic(as.matrix(matIW), beta.sim)
    predVP <- ordlogistic(as.matrix(matVP), beta.sim)
  }  

  #predicted probs estimates and difference of predicted probs
  diffs <- diffs.se <- diffs.L <- diffs.U <- as.matrix(rep(NA, 6))

  diffs[1] <- mean(predSO)
  diffs[2] <- mean(predVP)
  diffs[3] <- mean(predIW)
  diffs[4] <- mean(predVP - predSO)
  diffs[5] <- mean(predIW - predSO)
  diffs[6] <- mean(predVP - predIW)
  
  diffs.se[1] <- sd(predSO)
  diffs.se[2] <- sd(predVP)
  diffs.se[3] <- sd(predIW)
  diffs.se[4] <- sd(apply(predVP - predSO, 2, mean))
  diffs.se[5] <- sd(apply(predIW - predSO, 2, mean))
  diffs.se[6] <- sd(apply(predVP - predIW, 2, mean))
    
  diffs.L <- diffs - (diffs.se * qnorm(0.95))
  diffs.U <- diffs + (diffs.se * qnorm(0.95))
  
  #plot df
  predplot <- as.data.frame(
                  cbind(as.numeric(diffs), 
                  as.numeric(diffs.se), 
                  as.numeric(diffs.L), 
                  as.numeric(diffs.U), 
                  c("SO", 
                    "VP", 
                    "IW",
                    "VP-SO",
                    "IW-SO",
                    "VP-IW"),
                  c(1:6)))
  
  return(predplot)  

}

# Predicted Probability function for EEF scores w/ bootstrapped SEs
fitpredEEF <- function(fit, data, var, nsims = 1000){
  
 store_boot <- matrix(NA, nsims, 5)
 
 for(i in 1:nsims){
  
  ## Sample data, run model
  dat_sim <- data[sample(1:nrow(data), nrow(data), replace = TRUE),]
  
  model_out <- lm_robust(as.formula(fit$call$formula), 
                         alpha = .1,
                         clusters = SchoolID,
                         data = dat_sim)
  
  ## Get predictions
  preds_out <- rep(NA, 5)
  
  for(j in 1:5){
    dat_sim[,var] <- j
    preds <- predict(model_out, dat_sim)
    preds_out[j] <- mean(preds, na.rm = TRUE)
  }
  store_boot[i,] <- preds_out
 }
 
 EEFscore <- 1:5
 est <- apply(store_boot, 2, quantile, probs = .5)
 cil <- apply(store_boot, 2, quantile, probs = .025)
 ciu <- apply(store_boot, 2, quantile, probs = .975)
 cil_90 <- apply(store_boot, 2, quantile, probs = .05)
 ciu_90 <- apply(store_boot, 2, quantile, probs = .95)
 se <- (ciu - est) / qnorm(.975)

  return(as.data.frame(cbind(EEFscore, est, se, cil, ciu, cil_90, ciu_90)))
}

# Tidy function to format regression estimates for ggplot
mytidy <- function(model){
  out <- bind_cols(
    tidy(model) %>%
      mutate(pval_oneside = pt(statistic, df, lower.tail = FALSE)),
    as.data.frame(confint(model, level = .95))
  )
  ## Get difference
  if(!("scaled_center" %in% names(model))){
   diff <- coef(model)["VP"] - coef(model)["IW"]
   se <- sqrt(vcov(model)["VP", "VP"] + vcov(model)["IW", "IW"] - 
         2 * vcov(model)["VP", "IW"]) 
  }else{
   diff <- coef(model)["TreatCatVP"] - coef(model)["TreatCatIW"]
   se <- sqrt(vcov(model)["TreatCatVP", "TreatCatVP"] + 
                vcov(model)["TreatCatIW", "TreatCatIW"] - 
                2 * vcov(model)["TreatCatVP", "TreatCatIW"]) 
  }
  conf.low <- diff - se * qnorm(.95)
  conf.high <- diff + se * qnorm(.95)
  out <- bind_rows(
    out, 
    tibble(term = "diff_vpiw", estimate = diff, 
               std.error = se, conf.low = conf.low, conf.high = conf.high,
           outcome = model$outcome,
               `2.5 %` = diff - se * qnorm(.975),
               `97.5 %` = diff + se * qnorm(.975))
  )
  return(out)
}


# Functions to add a scalebar into a map

createScaleBar <- function(lon,lat,distanceLon,distanceLat,distanceLegend, dist.units = "km"){
	# First rectangle
	bottomRight <- gcDestination(lon = lon, lat = lat, bearing = 90, 
	                             dist = distanceLon, dist.units = dist.units, model = "WGS84")
	
	topLeft <- gcDestination(lon = lon, lat = lat, bearing = 0, dist = distanceLat, 
	                         dist.units = dist.units, model = "WGS84")
	rectangle <- cbind(lon=c(lon, lon, bottomRight[1,"long"], bottomRight[1,"long"], lon),
	lat = c(lat, topLeft[1,"lat"], topLeft[1,"lat"],lat, lat))
	rectangle <- data.frame(rectangle, stringsAsFactors = FALSE)
	
	# Second rectangle t right of the first rectangle
	bottomRight2 <- gcDestination(lon = lon, lat = lat, bearing = 90, 
	                              dist = distanceLon*2, dist.units = dist.units, model = "WGS84")
	rectangle2 <- cbind(lon = c(bottomRight[1,"long"], bottomRight[1,"long"], 
	                            bottomRight2[1,"long"], bottomRight2[1,"long"], bottomRight[1,"long"]),
	lat=c(lat, topLeft[1,"lat"], topLeft[1,"lat"], lat, lat))
	rectangle2 <- data.frame(rectangle2, stringsAsFactors = FALSE)
	
	# Now let's deal with the text
	onTop <- gcDestination(lon = lon, lat = lat, bearing = 0, dist = distanceLegend, 
	                       dist.units = dist.units, model = "WGS84")
	onTop2 <- onTop3 <- onTop
	onTop2[1,"long"] <- bottomRight[1,"long"]
	onTop3[1,"long"] <- bottomRight2[1,"long"]
	
	legend <- rbind(onTop, onTop2, onTop3)
	legend <- data.frame(cbind(legend, text = c(0, distanceLon, distanceLon*2)), 
	                     stringsAsFactors = FALSE, row.names = NULL)
	return(list(rectangle = rectangle, rectangle2 = rectangle2, legend = legend))
}


scaleBar <- function(lon, lat, distanceLon, distanceLat, distanceLegend, dist.unit = "km", 
                     rec.fill = "white", rec.colour = "black", rec2.fill = "gray", 
                     rec2.colour = "black", legend.colour = "black", legend.size = 3){
	laScaleBar <- createScaleBar(lon = lon, lat = lat, distanceLon = distanceLon, 
	                             distanceLat = distanceLat, distanceLegend = distanceLegend, 
	                             dist.unit = dist.unit)
	# First rectangle
	rectangle1 <- geom_polygon(data = laScaleBar$rectangle, aes(x = lon, y = lat), 
	                           fill = rec.fill, colour = rec.colour)
	
	# Second rectangle
	rectangle2 <- geom_polygon(data = laScaleBar$rectangle2, aes(x = lon, y = lat), 
	                           fill = rec2.fill, colour = rec2.colour)
	
	# Legend
	scaleBarLegend <- annotate("text", label = paste(laScaleBar$legend[,"text"], dist.unit, sep=""), 
	                           x = laScaleBar$legend[,"long"], y = laScaleBar$legend[,"lat"], 
	                           size = legend.size, colour = legend.colour)
	
	res <- list(rectangle1, rectangle2, scaleBarLegend)
	
	return(res)
}
